From: Keir Fraser Date: Fri, 12 Sep 2008 09:34:50 +0000 (+0100) Subject: x86: Clean up cpufreq core logic X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~14110^2~15 X-Git-Url: https://dgit.raspbian.org/%22http://www.example.com/cgi/%22/%22http:/www.example.com/cgi/%22?a=commitdiff_plain;h=ea9aa98036459b0fc3a475cdd20962d23079072a;p=xen.git x86: Clean up cpufreq core logic Clean up cpufreq core logic, which now can cope with cpu online/offline event, and also dynamic platform limitation event (_PPC). Signed-off-by: Liu, Jinsong --- diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c b/xen/arch/x86/acpi/cpufreq/cpufreq.c index 47fd7fee5f..a304f352bc 100644 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -44,12 +45,8 @@ #include #include -struct processor_pminfo processor_pminfo[NR_CPUS]; -struct cpufreq_policy xen_px_policy[NR_CPUS]; - -static cpumask_t *cpufreq_dom_pt; -static unsigned long *cpufreq_dom_mask; -static unsigned int cpufreq_dom_max; +/* TODO: change to link list later as domain number may be sparse */ +static cpumask_t cpufreq_dom_map[NR_CPUS]; enum { UNDEFINED_CAPABLE = 0, @@ -335,7 +332,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, if (unlikely(result)) return -ENODEV; - online_policy_cpus = policy->cpus; + cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); next_perf_state = data->freq_table[next_state].index; if (perf->state == next_perf_state) { @@ -390,6 +387,20 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return result; } +static int acpi_cpufreq_verify(struct cpufreq_policy *policy) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + struct processor_performance *perf = &processor_pminfo[policy->cpu].perf; + + if (!policy || !data) + return -EINVAL; + + cpufreq_verify_within_limits(policy, 0, + perf->states[perf->platform_limit].core_frequency * 1000); + + return cpufreq_frequency_table_verify(policy, data->freq_table); +} + static unsigned long acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) { @@ -441,14 +452,6 @@ acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) perf = data->acpi_data; policy->shared_type = perf->shared_type; - /* - * Currently the latest linux (kernel version 2.6.26) - * still has issue when handle the situation _psd HW_ALL coordination. - * In Xen hypervisor, we handle _psd HW_ALL coordination in same way as - * _psd SW_ALL coordination for the seek of safety. - */ - policy->cpus = perf->shared_cpu_map; - /* capability check */ if (perf->state_count <= 1) { printk("No P-States\n"); @@ -496,6 +499,7 @@ acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = perf->states[i].transition_latency * 1000; } + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; data->max_freq = perf->states[0].core_frequency * 1000; /* table init */ @@ -554,114 +558,173 @@ err_unreg: return result; } +static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + + if (data) { + drv_data[policy->cpu] = NULL; + xfree(data->freq_table); + xfree(data); + } + + return 0; +} + static struct cpufreq_driver acpi_cpufreq_driver = { + .verify = acpi_cpufreq_verify, .target = acpi_cpufreq_target, .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, }; -void cpufreq_dom_exit(void) +int cpufreq_limit_change(unsigned int cpu) { - cpufreq_dom_max = 0; - if (cpufreq_dom_mask) - xfree(cpufreq_dom_mask); - if (cpufreq_dom_pt) - xfree(cpufreq_dom_pt); -} + struct processor_performance *perf = &processor_pminfo[cpu].perf; + struct cpufreq_policy *data = cpufreq_cpu_policy[cpu]; + struct cpufreq_policy policy; -int cpufreq_dom_init(void) -{ - unsigned int i; + if (!cpu_online(cpu) || !data) + return -ENODEV; - cpufreq_dom_max = 0; + if ((perf->platform_limit < 0) || + (perf->platform_limit >= perf->state_count)) + return -EINVAL; - for_each_online_cpu(i) { - if (cpufreq_dom_max < processor_pminfo[i].perf.domain_info.domain) - cpufreq_dom_max = processor_pminfo[i].perf.domain_info.domain; - } - cpufreq_dom_max++; + memcpy(&policy, data, sizeof(struct cpufreq_policy)); - cpufreq_dom_mask = xmalloc_array(unsigned long, - BITS_TO_LONGS(cpufreq_dom_max)); - if (!cpufreq_dom_mask) - return -ENOMEM; - bitmap_zero(cpufreq_dom_mask, cpufreq_dom_max); + policy.max = + perf->states[perf->platform_limit].core_frequency * 1000; - cpufreq_dom_pt = xmalloc_array(cpumask_t, cpufreq_dom_max); - if (!cpufreq_dom_pt) - return -ENOMEM; - memset(cpufreq_dom_pt, 0, cpufreq_dom_max * sizeof(cpumask_t)); + return __cpufreq_set_policy(data, &policy); +} - for_each_online_cpu(i) { - __set_bit(processor_pminfo[i].perf.domain_info.domain, cpufreq_dom_mask); - cpu_set(i, cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain]); - } +int cpufreq_add_cpu(unsigned int cpu) +{ + int ret = 0; + unsigned int firstcpu; + unsigned int dom; + unsigned int j; + struct cpufreq_policy new_policy; + struct cpufreq_policy *policy; + struct processor_performance *perf = &processor_pminfo[cpu].perf; + + /* to protect the case when Px was not controlled by xen */ + if (!(perf->init & XEN_PX_INIT)) + return 0; - for_each_online_cpu(i) - processor_pminfo[i].perf.shared_cpu_map = - cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain]; + if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu]) + return -EINVAL; - return 0; -} + ret = px_statistic_init(cpu); + if (ret) + return ret; -static int cpufreq_cpu_init(void) -{ - int i, ret = 0; + dom = perf->domain_info.domain; + if (cpus_weight(cpufreq_dom_map[dom])) { + /* share policy with the first cpu since on same boat */ + firstcpu = first_cpu(cpufreq_dom_map[dom]); + policy = cpufreq_cpu_policy[firstcpu]; + + cpufreq_cpu_policy[cpu] = policy; + cpu_set(cpu, cpufreq_dom_map[dom]); + cpu_set(cpu, policy->cpus); + + printk(KERN_EMERG"adding CPU %u\n", cpu); + } else { + /* for the first cpu, setup policy and do init work */ + policy = xmalloc(struct cpufreq_policy); + if (!policy) { + px_statistic_exit(cpu); + return -ENOMEM; + } + memset(policy, 0, sizeof(struct cpufreq_policy)); - for_each_online_cpu(i) { - xen_px_policy[i].cpu = i; + cpufreq_cpu_policy[cpu] = policy; + cpu_set(cpu, cpufreq_dom_map[dom]); + cpu_set(cpu, policy->cpus); - ret = px_statistic_init(i); + policy->cpu = cpu; + ret = cpufreq_driver->init(policy); if (ret) - return ret; + goto err1; + printk(KERN_EMERG"CPU %u initialization completed\n", cpu); + } - ret = acpi_cpufreq_cpu_init(&xen_px_policy[i]); + /* + * After get full cpumap of the coordination domain, + * we can safely start gov here. + */ + if (cpus_weight(cpufreq_dom_map[dom]) == + perf->domain_info.num_processors) { + memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); + policy->governor = NULL; + ret = __cpufreq_set_policy(policy, &new_policy); if (ret) - return ret; + goto err2; } - return ret; -} -int cpufreq_dom_dbs(unsigned int event) -{ - unsigned int cpu, dom; - int ret = 0; + return 0; - for (dom = 0; dom < cpufreq_dom_max; dom++) { - if (!test_bit(dom, cpufreq_dom_mask)) - continue; - cpu = first_cpu(cpufreq_dom_pt[dom]); - ret = cpufreq_governor_dbs(&xen_px_policy[cpu], event); - if (ret) - return ret; +err2: + cpufreq_driver->exit(policy); +err1: + for_each_cpu_mask(j, cpufreq_dom_map[dom]) { + cpufreq_cpu_policy[j] = NULL; + px_statistic_exit(j); } + + cpus_clear(cpufreq_dom_map[dom]); + xfree(policy); return ret; } -int acpi_cpufreq_init(void) +int cpufreq_del_cpu(unsigned int cpu) { - int ret = 0; - - /* setup cpumask of psd dom and shared cpu map of cpu */ - ret = cpufreq_dom_init(); - if (ret) - goto err; + unsigned int dom; + struct cpufreq_policy *policy; + struct processor_performance *perf = &processor_pminfo[cpu].perf; - /* setup cpufreq driver */ - cpufreq_driver = &acpi_cpufreq_driver; + /* to protect the case when Px was not controlled by xen */ + if (!(perf->init & XEN_PX_INIT)) + return 0; - /* setup cpufreq infrastructure */ - ret = cpufreq_cpu_init(); - if (ret) - goto err; + if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu]) + return -EINVAL; - /* setup cpufreq dbs according to dom coordiation */ - ret = cpufreq_dom_dbs(CPUFREQ_GOV_START); - if (ret) - goto err; + dom = perf->domain_info.domain; + policy = cpufreq_cpu_policy[cpu]; - return ret; + printk(KERN_EMERG"deleting CPU %u\n", cpu); + + /* for the first cpu of the domain, stop gov */ + if (cpus_weight(cpufreq_dom_map[dom]) == + perf->domain_info.num_processors) + __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + + cpufreq_cpu_policy[cpu] = NULL; + cpu_clear(cpu, policy->cpus); + cpu_clear(cpu, cpufreq_dom_map[dom]); + px_statistic_exit(cpu); + + /* for the last cpu of the domain, clean room */ + /* It's safe here to free freq_table, drv_data and policy */ + if (!cpus_weight(cpufreq_dom_map[dom])) { + cpufreq_driver->exit(policy); + xfree(policy); + } + + return 0; +} + +static int __init cpufreq_driver_init(void) +{ + int ret = 0; + + if ((cpufreq_controller == FREQCTL_xen) && + (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) + ret = cpufreq_register_driver(&acpi_cpufreq_driver); -err: - cpufreq_dom_exit(); return ret; } +__initcall(cpufreq_driver_init); diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c b/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c index 887acb1a91..f1b676c2f4 100644 --- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c +++ b/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c @@ -238,4 +238,9 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) break; } return 0; -} +} + +struct cpufreq_governor cpufreq_gov_dbs = { + .name = "ondemand", + .governor = cpufreq_governor_dbs, +}; diff --git a/xen/arch/x86/acpi/cpufreq/powernow.c b/xen/arch/x86/acpi/cpufreq/powernow.c index d4ab452b4c..9c21d038c1 100644 --- a/xen/arch/x86/acpi/cpufreq/powernow.c +++ b/xen/arch/x86/acpi/cpufreq/powernow.c @@ -50,7 +50,7 @@ #define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */ extern struct processor_pminfo processor_pminfo[NR_CPUS]; -extern struct cpufreq_policy xen_px_policy[NR_CPUS]; +extern struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS]; struct powernow_cpufreq_data { struct processor_performance *acpi_data; @@ -281,9 +281,9 @@ int powernow_cpufreq_init(void) /* setup cpufreq infrastructure */ for_each_online_cpu(i) { - xen_px_policy[i].cpu = i; + cpufreq_cpu_policy[i]->cpu = i; - ret = powernow_cpufreq_cpu_init(&xen_px_policy[i]); + ret = powernow_cpufreq_cpu_init(cpufreq_cpu_policy[i]); if (ret) goto cpufreq_init_out; } @@ -293,7 +293,7 @@ int powernow_cpufreq_init(void) if (!cpu_isset(dom, dom_mask)) continue; i = first_cpu(pt[dom]); - ret = cpufreq_governor_dbs(&xen_px_policy[i], CPUFREQ_GOV_START); + ret = cpufreq_governor_dbs(cpufreq_cpu_policy[i], CPUFREQ_GOV_START); if (ret) goto cpufreq_init_out; } diff --git a/xen/arch/x86/acpi/cpufreq/utility.c b/xen/arch/x86/acpi/cpufreq/utility.c index 787e91ceef..51194e8990 100644 --- a/xen/arch/x86/acpi/cpufreq/utility.c +++ b/xen/arch/x86/acpi/cpufreq/utility.c @@ -31,47 +31,14 @@ #include #include -struct cpufreq_driver *cpufreq_driver; +struct cpufreq_driver *cpufreq_driver; +struct processor_pminfo processor_pminfo[NR_CPUS]; +struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS]; /********************************************************************* * Px STATISTIC INFO * *********************************************************************/ -void px_statistic_suspend(void) -{ - int cpu; - uint64_t now; - - now = NOW(); - - for_each_online_cpu(cpu) { - struct pm_px *pxpt = &px_statistic_data[cpu]; - uint64_t total_idle_ns; - uint64_t tmp_idle_ns; - - total_idle_ns = get_cpu_idle_time(cpu); - tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; - - pxpt->u.pt[pxpt->u.cur].residency += - now - pxpt->prev_state_wall; - pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns; - } -} - -void px_statistic_resume(void) -{ - int cpu; - uint64_t now; - - now = NOW(); - - for_each_online_cpu(cpu) { - struct pm_px *pxpt = &px_statistic_data[cpu]; - pxpt->prev_state_wall = now; - pxpt->prev_idle_wall = get_cpu_idle_time(cpu); - } -} - void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to) { uint32_t i; @@ -101,7 +68,7 @@ void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to) } } -int px_statistic_init(int cpuid) +int px_statistic_init(unsigned int cpuid) { uint32_t i, count; struct pm_px *pxpt = &px_statistic_data[cpuid]; @@ -123,7 +90,7 @@ int px_statistic_init(int cpuid) memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val))); pxpt->u.total = pmpt->perf.state_count; - pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc; + pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; for (i=0; i < pmpt->perf.state_count; i++) pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency; @@ -134,7 +101,16 @@ int px_statistic_init(int cpuid) return 0; } -void px_statistic_reset(int cpuid) +void px_statistic_exit(unsigned int cpuid) +{ + struct pm_px *pxpt = &px_statistic_data[cpuid]; + + xfree(pxpt->u.trans_pt); + xfree(pxpt->u.pt); + memset(pxpt, 0, sizeof(struct pm_px)); +} + +void px_statistic_reset(unsigned int cpuid) { uint32_t i, j, count; struct pm_px *pxpt = &px_statistic_data[cpuid]; @@ -184,6 +160,38 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, return 0; } +int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table) +{ + unsigned int next_larger = ~0; + unsigned int i; + unsigned int count = 0; + + if (!cpu_online(policy->cpu)) + return -EINVAL; + + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + unsigned int freq = table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) + continue; + if ((freq >= policy->min) && (freq <= policy->max)) + count++; + else if ((next_larger > freq) && (freq > policy->max)) + next_larger = freq; + } + + if (!count) + policy->max = next_larger; + + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + return 0; +} + int cpufreq_frequency_table_target(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table, unsigned int target_freq, @@ -289,57 +297,51 @@ int __cpufreq_driver_getavg(struct cpufreq_policy *policy) /********************************************************************* - * CPUFREQ SUSPEND/RESUME * + * POLICY * *********************************************************************/ -void cpufreq_suspend(void) -{ - int cpu; - - /* to protect the case when Px was not controlled by xen */ - for_each_online_cpu(cpu) { - struct processor_performance *perf = &processor_pminfo[cpu].perf; - - if (!(perf->init & XEN_PX_INIT)) - return; - } - - cpufreq_dom_dbs(CPUFREQ_GOV_STOP); - - cpufreq_dom_exit(); - - px_statistic_suspend(); -} - -int cpufreq_resume(void) +/* + * data : current policy. + * policy : policy to be set. + */ +int __cpufreq_set_policy(struct cpufreq_policy *data, + struct cpufreq_policy *policy) { - int cpu, ret = 0; - - /* 1. to protect the case when Px was not controlled by xen */ - /* 2. set state and resume flag to sync cpu to right state and freq */ - for_each_online_cpu(cpu) { - struct processor_performance *perf = &processor_pminfo[cpu].perf; - struct cpufreq_policy *policy = &xen_px_policy[cpu]; - - if (!(perf->init & XEN_PX_INIT)) - goto err; - perf->state = 0; - policy->resume = 1; - } + int ret = 0; - px_statistic_resume(); + memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo)); - ret = cpufreq_dom_init(); - if (ret) - goto err; + if (policy->min > data->min && policy->min > policy->max) + return -EINVAL; - ret = cpufreq_dom_dbs(CPUFREQ_GOV_START); + /* verify the cpu speed can be set within this limit */ + ret = cpufreq_driver->verify(policy); if (ret) - goto err; - - return ret; + return ret; + + data->min = policy->min; + data->max = policy->max; + + if (policy->governor != data->governor) { + /* save old, working values */ + struct cpufreq_governor *old_gov = data->governor; + + /* end old governor */ + if (data->governor) + __cpufreq_governor(data, CPUFREQ_GOV_STOP); + + /* start new governor */ + data->governor = policy->governor; + if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { + /* new governor failed, so re-start old one */ + if (old_gov) { + data->governor = old_gov; + __cpufreq_governor(data, CPUFREQ_GOV_START); + } + return -EINVAL; + } + /* might be a policy change, too, so fall through */ + } -err: - cpufreq_dom_exit(); - return ret; + return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); } diff --git a/xen/arch/x86/acpi/pmstat.c b/xen/arch/x86/acpi/pmstat.c index ec408a2333..e49b82b328 100644 --- a/xen/arch/x86/acpi/pmstat.c +++ b/xen/arch/x86/acpi/pmstat.c @@ -78,7 +78,7 @@ int do_get_pm_info(struct xen_sysctl_get_pmstat *op) tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; now = NOW(); - pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc; + pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall; pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns; pxpt->prev_state_wall = now; diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c index d9d5647a45..7e96bfc796 100644 --- a/xen/arch/x86/acpi/power.c +++ b/xen/arch/x86/acpi/power.c @@ -133,8 +133,6 @@ static int enter_state(u32 state) freeze_domains(); - cpufreq_suspend(); - disable_nonboot_cpus(); if ( num_online_cpus() != 1 ) { @@ -142,6 +140,8 @@ static int enter_state(u32 state) goto enable_cpu; } + cpufreq_del_cpu(0); + hvm_cpu_down(); acpi_sleep_prepare(state); @@ -189,8 +189,8 @@ static int enter_state(u32 state) BUG(); enable_cpu: + cpufreq_add_cpu(0); enable_nonboot_cpus(); - cpufreq_resume(); thaw_domains(); spin_unlock(&pm_lock); return error; diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c index c5747085bb..bb36f8943b 100644 --- a/xen/arch/x86/platform_hypercall.c +++ b/xen/arch/x86/platform_hypercall.c @@ -393,7 +393,6 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) memcpy ((void *)&pxpt->status_register, (void *)&xenpxpt->status_register, sizeof(struct xen_pct_register)); - pxpt->init |= XEN_PX_PCT; } if ( xenpxpt->flags & XEN_PX_PSS ) { @@ -411,7 +410,6 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) break; } pxpt->state_count = xenpxpt->state_count; - pxpt->init |= XEN_PX_PSS; } if ( xenpxpt->flags & XEN_PX_PSD ) { @@ -419,27 +417,34 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) memcpy ((void *)&pxpt->domain_info, (void *)&xenpxpt->domain_info, sizeof(struct xen_psd_package)); - pxpt->init |= XEN_PX_PSD; } if ( xenpxpt->flags & XEN_PX_PPC ) { - pxpt->ppc = xenpxpt->ppc; - pxpt->init |= XEN_PX_PPC; + pxpt->platform_limit = xenpxpt->platform_limit; + + if ( pxpt->init == XEN_PX_INIT ) + { + ret = cpufreq_limit_change(cpuid); + break; + } } - if ( pxpt->init == ( XEN_PX_PCT | XEN_PX_PSS | - XEN_PX_PSD | XEN_PX_PPC ) ) + if ( xenpxpt->flags == ( XEN_PX_PCT | XEN_PX_PSS | + XEN_PX_PSD | XEN_PX_PPC ) ) { - pxpt->init |= XEN_PX_INIT; + pxpt->init = XEN_PX_INIT; cpu_count++; - } - if ( cpu_count == num_online_cpus() ) - { - if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) + + /* Currently we only handle Intel and AMD processor */ + if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) + ret = cpufreq_add_cpu(cpuid); + else if ( (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && + (cpu_count == num_online_cpus()) ) ret = powernow_cpufreq_init(); else - ret = acpi_cpufreq_init(); + break; } + break; } diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 7e9f81998d..8136a3f0b8 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -55,6 +55,7 @@ #include #include #include +#include #define set_kernel_exec(x, y) (0) #define setup_trampoline() (bootsym_phys(trampoline_realmode_entry)) @@ -1232,6 +1233,8 @@ int __cpu_disable(void) mdelay(1); local_irq_disable(); + cpufreq_del_cpu(cpu); + time_suspend(); remove_siblinginfo(cpu); @@ -1421,6 +1424,8 @@ int __devinit __cpu_up(unsigned int cpu) mb(); process_pending_timers(); } + + cpufreq_add_cpu(cpu); return 0; } diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h index c82ad5b886..59ceac34f5 100644 --- a/xen/include/acpi/cpufreq/cpufreq.h +++ b/xen/include/acpi/cpufreq/cpufreq.h @@ -19,6 +19,8 @@ #define CPUFREQ_NAME_LEN 16 +struct cpufreq_governor; + struct cpufreq_cpuinfo { unsigned int max_freq; unsigned int min_freq; @@ -30,16 +32,21 @@ struct cpufreq_policy { unsigned int shared_type; /* ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of registered CPU */ - struct cpufreq_cpuinfo cpuinfo; /* see above */ + struct cpufreq_cpuinfo cpuinfo; unsigned int min; /* in kHz */ unsigned int max; /* in kHz */ unsigned int cur; /* in kHz, only needed if cpufreq * governors are used */ + struct cpufreq_governor *governor; + unsigned int resume; /* flag for cpufreq 1st run * S3 wakeup, hotplug cpu, etc */ }; -extern struct cpufreq_policy xen_px_policy[NR_CPUS]; +extern struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS]; + +extern int __cpufreq_set_policy(struct cpufreq_policy *data, + struct cpufreq_policy *policy); #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */ @@ -64,12 +71,27 @@ struct cpufreq_freqs { #define CPUFREQ_GOV_STOP 2 #define CPUFREQ_GOV_LIMITS 3 +struct cpufreq_governor { + char name[CPUFREQ_NAME_LEN]; + int (*governor)(struct cpufreq_policy *policy, + unsigned int event); +}; + +extern struct cpufreq_governor cpufreq_gov_dbs; +#define CPUFREQ_DEFAULT_GOVERNOR &cpufreq_gov_dbs + /* pass a target to the cpufreq driver */ extern int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy); +static __inline__ int +__cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) +{ + return policy->governor->governor(policy, event); +} + /********************************************************************* * CPUFREQ DRIVER INTERFACE * @@ -91,7 +113,50 @@ struct cpufreq_driver { extern struct cpufreq_driver *cpufreq_driver; -void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state); +static __inline__ +int cpufreq_register_driver(struct cpufreq_driver *driver_data) +{ + if (!driver_data || + !driver_data->init || + !driver_data->exit || + !driver_data->verify || + !driver_data->target) + return -EINVAL; + + if (cpufreq_driver) + return -EBUSY; + + cpufreq_driver = driver_data; + return 0; +} + +static __inline__ +int cpufreq_unregister_driver(struct cpufreq_driver *driver) +{ + if (!cpufreq_driver || (driver != cpufreq_driver)) + return -EINVAL; + + cpufreq_driver = NULL; + return 0; +} + +static __inline__ +void cpufreq_verify_within_limits(struct cpufreq_policy *policy, + unsigned int min, unsigned int max) +{ + if (policy->min < min) + policy->min = min; + if (policy->max < min) + policy->max = min; + if (policy->min > max) + policy->min = max; + if (policy->max > max) + policy->max = max; + if (policy->min > policy->max) + policy->min = policy->max; + return; +} + /********************************************************************* * FREQUENCY TABLE HELPERS * @@ -109,6 +174,9 @@ struct cpufreq_frequency_table { int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); +int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table); + int cpufreq_frequency_table_target(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table, unsigned int target_freq, diff --git a/xen/include/acpi/cpufreq/processor_perf.h b/xen/include/acpi/cpufreq/processor_perf.h index f5251a553c..025c123da9 100644 --- a/xen/include/acpi/cpufreq/processor_perf.h +++ b/xen/include/acpi/cpufreq/processor_perf.h @@ -7,26 +7,23 @@ #define XEN_PX_INIT 0x80000000 int get_cpu_id(u8); -int acpi_cpufreq_init(void); int powernow_cpufreq_init(void); void px_statistic_update(cpumask_t, uint8_t, uint8_t); -int px_statistic_init(int); -void px_statistic_reset(int); -void px_statistic_suspend(void); -void px_statistic_resume(void); +int px_statistic_init(unsigned int); +void px_statistic_exit(unsigned int); +void px_statistic_reset(unsigned int); -void cpufreq_dom_exit(void); -int cpufreq_dom_init(void); -int cpufreq_dom_dbs(unsigned int); -void cpufreq_suspend(void); -int cpufreq_resume(void); +int cpufreq_limit_change(unsigned int); + +int cpufreq_add_cpu(unsigned int); +int cpufreq_del_cpu(unsigned int); uint64_t get_cpu_idle_time(unsigned int); struct processor_performance { uint32_t state; - uint32_t ppc; + uint32_t platform_limit; struct xen_pct_register control_register; struct xen_pct_register status_register; uint32_t state_count; diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h index 742c0fc1c2..eee047be41 100644 --- a/xen/include/public/platform.h +++ b/xen/include/public/platform.h @@ -289,7 +289,7 @@ struct xen_psd_package { struct xen_processor_performance { uint32_t flags; /* flag for Px sub info type */ - uint32_t ppc; /* Platform limitation on freq usage */ + uint32_t platform_limit; /* Platform limitation on freq usage */ struct xen_pct_register control_register; struct xen_pct_register status_register; uint32_t state_count; /* total available performance states */